Diffusion Models

Diffusion Models#

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from scipy.stats import kstest
from IPython.display import HTML

# === Parameters ===
np.random.seed(42)
n_samples = 10000     # Number of data points
T = 200               # Number of diffusion steps
beta_start = 1e-4     # Starting noise level
beta_end = 0.1        # Ending noise level

# === Initial Data: piecewise uniform with gaps ===
n1 = n_samples // 3
n2 = n_samples // 3
n3 = n_samples - n1 - n2
initial_data = np.concatenate([
    np.random.uniform(-10, -6, size=n1),
    np.random.uniform(-2,  2, size=n2),
    np.random.uniform( 6, 10, size=n3),
])

# === Noise schedule ===
beta = np.linspace(beta_start, beta_end, T)
alpha = 1.0 - beta
bar_alpha = np.cumprod(alpha)

# === Precompute forward diffusion steps ===
diffusion_steps = [initial_data.copy()]
for t in range(T):
    x_prev = diffusion_steps[-1]
    noise = np.random.randn(n_samples)
    x_next = np.sqrt(alpha[t]) * x_prev + np.sqrt(beta[t]) * noise
    diffusion_steps.append(x_next)

# === Test normality at each step (KS test) ===
normality_p = []
for data in diffusion_steps:
    m, s = data.mean(), data.std()
    _, pvalue = kstest(data, 'norm', args=(m, s))
    normality_p.append(pvalue)

# === Set up histogram bins & Gaussian PDF ===
bins = np.linspace(-12, 12, 60)
bin_centers = 0.5 * (bins[:-1] + bins[1:])
x_grid = np.linspace(-12, 12, 500)
gauss_pdf = np.exp(-0.5 * x_grid**2) / np.sqrt(2 * np.pi)

# === Create figure & initial plot ===
fig, ax = plt.subplots(figsize=(8, 5))
ax.set_xlim(-12, 12)
ax.set_ylim(0, 0.45)
ax.set_xlabel("Value")
ax.set_ylabel("Density")
ax.grid(True)

# initial histogram
hist_vals, _ = np.histogram(initial_data, bins=bins, density=True)
bars = ax.bar(bin_centers, hist_vals, width=bins[1]-bins[0], alpha=0.6, color='orange')

# overlay final Gaussian curve
line_pdf, = ax.plot(x_grid, gauss_pdf, 'r--', lw=2, label='Standard Gaussian')
ax.legend(loc='upper right')

# equation text (constant)
equation_text = ax.text(
    0.5, 1.08,
    r"$x_t = \sqrt{1-\beta_t}\,x_{t-1} + \sqrt{\beta_t}\,\epsilon,\quad \epsilon\sim\mathcal{N}(0,I)$",
    transform=ax.transAxes, ha="center", va="bottom", fontsize=12
)

# subtitle text (updates each frame)
subtitle_text = ax.text(
    0.5, 1.02,
    "", transform=ax.transAxes, ha="center", va="bottom", fontsize=10
)

# === Animation update function ===
def update(frame):
    data = diffusion_steps[frame]
    hist_vals, _ = np.histogram(data, bins=bins, density=True)
    for bar, h in zip(bars, hist_vals):
        bar.set_height(h)
    pval = normality_p[frame]
    subtitle_text.set_text(f"Step {frame}/{T}  |  KS p-value = {pval:.3f}; close to 1 → Gaussian; close to 0 → Not Gaussian")
    return (*bars, subtitle_text)

# === Create Animation ===
ani = FuncAnimation(
    fig, update,
    frames=len(diffusion_steps),
    interval=50,
    blit=True
)

# prevent static plot from showing
plt.close(fig)

# Display in Jupyter
HTML(ani.to_jshtml())
import numpy as np
import matplotlib.pyplot as plt

# === Parameters ===
np.random.seed(42)
n_samples = 10000     # Number of data points
T = 200               # Number of diffusion steps
beta_start = 1e-4     # Starting noise level
beta_end = 0.1        # Ending noise level

# === Initial Data: piecewise uniform with gaps ===
n1 = n_samples // 3
n2 = n_samples // 3
n3 = n_samples - n1 - n2
initial_data = np.concatenate([
    np.random.uniform(-10, -6, size=n1),
    np.random.uniform(-2,  2, size=n2),
    np.random.uniform( 6, 10, size=n3),
])

# === Define Beta Schedules ===
schedules = {
    'Linear':    np.linspace(beta_start, beta_end, T),
    'Quadratic': np.linspace(np.sqrt(beta_start), np.sqrt(beta_end), T)**2,
    'Constant':  np.full(T, beta_end)
}

# === Timesteps to visualize ===
timesteps = [0, T//2, T]  # start, mid, end

# === Histogram bins ===
bins = np.linspace(-12, 12, 60)

# === Standard normal PDF for overlay ===
x_grid = np.linspace(-12, 12, 500)
gauss_pdf = np.exp(-0.5 * x_grid**2) / np.sqrt(2 * np.pi)

# === Colors matching the screenshot style ===
bar_color = "#F3B762"
edge_color = "#6D4301"
gauss_color = "r"

# === Create subplots: add extra col for beta curves ===
fig, axes = plt.subplots(
    nrows=len(schedules),
    ncols=len(timesteps) + 1,
    figsize=(15, 8),
    sharey=True
)

# === Plot Beta schedules column (leftmost) ===
for i, (name, beta) in enumerate(schedules.items()):
    ax = axes[i, 0]
    ax.plot(range(1, T+1), beta, color="C0")
    ax.set_xlim(0, T)
    ax.set_title(f"{name}\nBeta Schedule")
    if i == len(schedules)-1:
        ax.set_xlabel("Diffusion Step")
    ax.set_ylabel("Beta Value")

# === Plot histograms for each schedule/timestep ===
for i, (name, beta) in enumerate(schedules.items()):
    # Simulate forward diffusion for this beta schedule
    diffusion = [initial_data.copy()]
    for t in range(T):
        x_prev = diffusion[-1]
        noise = np.random.randn(n_samples)
        x_next = np.sqrt(1 - beta[t]) * x_prev + np.sqrt(beta[t]) * noise
        diffusion.append(x_next)

    for j, t in enumerate(timesteps):
        ax = axes[i, j+1]   # +1 to account for leftmost beta plot
        data = diffusion[t]
        # Histogram (not line plot)
        hist = ax.hist(data, bins=bins, density=True, 
                       color=bar_color, edgecolor=edge_color, alpha=0.85)
        # At final timestep overlay Gaussian
        if t == T:
            ax.plot(x_grid, gauss_pdf, gauss_color+"--", lw=2, label="Std Gaussian")
            ax.legend()
        ax.set_xlim(-12, 12)
        ax.set_title(f"{name}\nstep {t}")
        if j == 0:
            ax.set_ylabel("Density")
        if i == len(schedules)-1:
            ax.set_xlabel("Value")
        # Only set ylim for the very first histogram (top-left only)
        if i == 0 and j == 0:
            ax.set_ylim(0, 0.2)

plt.suptitle("Effect of Beta Schedule on Forward Diffusion", y=1.03, fontsize=16)
plt.tight_layout()
plt.show()
../_images/88d872da4022b892449e079902d3503b013abde29f2f029feae0f5cbf930c138.png